图表说明 | Figure Legend:

  • A 面板 | Panel A: 展示了31个低氧相关基因在6个样品中的表达水平热图。颜色从蓝色(低表达)到红色(高表达)变化。样品按条件分组(正常氧气、低氧、高度低氧)。

  • B 面板 | Panel B: 散点图显示了每个基因与低氧评分的皮尔逊相关系数和对应的-log10 p值。横线代表0.05显著性水平。

  • C 面板 | Panel C: 按功能通路分组展示的基因表达情况,包括转录因子、血管生成、代谢重编程等关键通路。

  • D 面板 | Panel D: 基于表达相关性(>0.6)构建的基因调控网络。

1. 生成示例数据 | Generate Sample Data

# 设置随机种子以保证可重复性 | Set seed for reproducibility
set.seed(2025)

# 生成基因表达数据 | Generate gene expression data
genes <- c(
  "HIF1A", "HIF2A", "HIF3A", "VEGFA", "VEGFB", "VEGFC",
  "EPO", "BNIP3", "BNIP3L", "CAIX", "PHD1", "PHD2", "PHD3",
  "PGK1", "LDHA", "SLC16A1", "GLUT1", "GLUT3", "ENO1",
  "PFKFB3", "PFKFB4", "PKM2", "PDK1", "PDK3", "GBE1",
  "NDRG1", "ANKRD37", "KDM4A", "LOX", "SERPINE1", "CTGF"
)

# 创建样品名称 | Create sample names (6个细胞系 | 6 cell lines)
samples <- paste0("Cell_", 1:6)
conditions <- c("Normoxia", "Normoxia", "Hypoxia", "Hypoxia", "Hypoxia_High", "Hypoxia_High")

# 生成表达矩阵 | Generate expression matrix
n_genes <- length(genes)
n_samples <- length(samples)

# 正常氧气条件下的基线表达 | Baseline expression under normoxia
baseline_expr <- rnorm(n_genes, mean = 5, sd = 1.5)

# 创建表达数据矩阵 | Create expression data matrix
expr_matrix <- matrix(0, nrow = n_genes, ncol = n_samples)
rownames(expr_matrix) <- genes
colnames(expr_matrix) <- samples

for (i in 1:n_genes) {
  for (j in 1:n_samples) {
    if (j <= 2) {
      # 正常氧气条件 | Normoxia condition
      expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 0, sd = 0.5)
    } else if (j <= 4) {
      # 低氧条件 | Hypoxia condition (增加表达 | increased expression)
      expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 3, sd = 1)
    } else {
      # 高度低氧条件 | High hypoxia condition (进一步增加 | further increased)
      expr_matrix[i, j] <- baseline_expr[i] + rnorm(1, mean = 5, sd = 1.2)
    }
  }
}

# 确保没有负值 | Ensure no negative values
expr_matrix[expr_matrix < 0] <- 0.1

# 计算低氧评分 | Calculate hypoxia scores
hypoxia_cores <- c(
  "HIF1A", "VEGFA", "EPO", "CAIX", "PHD2", "LDHA", "GLUT1", "PGK1"
)

hypoxia_score <- colMeans(expr_matrix[rownames(expr_matrix) %in% hypoxia_cores, ])

# 创建元数据 | Create metadata
sample_info <- data.frame(
  sample = samples,
  condition = conditions,
  hypoxia_score = hypoxia_score,
  stringsAsFactors = FALSE
)

head(expr_matrix)
##         Cell_1   Cell_2    Cell_3   Cell_4    Cell_5    Cell_6
## HIF1A 6.841070 5.295926  9.519951 8.296882  9.412157 11.271634
## HIF2A 4.312582 5.509402  9.024931 9.326589 11.808184  9.320163
## HIF3A 5.979455 5.395156 10.292995 8.718993 11.315257 12.205211
## VEGFA 7.161491 7.094180  8.680724 9.746536 13.214419 12.339775
## VEGFB 6.090059 5.870653  7.724884 7.864078 10.502715 11.269946
## VEGFC 5.075269 4.995429  9.128781 6.608034  6.814880  8.672643
head(sample_info)
##        sample    condition hypoxia_score
## Cell_1 Cell_1     Normoxia      5.736570
## Cell_2 Cell_2     Normoxia      5.370170
## Cell_3 Cell_3      Hypoxia      8.260937
## Cell_4 Cell_4      Hypoxia      8.438362
## Cell_5 Cell_5 Hypoxia_High     10.194875
## Cell_6 Cell_6 Hypoxia_High     10.564447

2. 基因与低氧评分的关联分析 | Gene-Hypoxia Score Association Analysis

# 计算每个基因与低氧评分的相关系数 | Calculate correlation between genes and hypoxia scores
correlations <- apply(expr_matrix, 1, function(x) {
  cor(x, sample_info$hypoxia_score)
})

# 计算p值 | Calculate p-values
p_values <- apply(expr_matrix, 1, function(x) {
  cor.test(x, sample_info$hypoxia_score)$p.value
})

# 创建结果数据框 | Create results dataframe
gene_hypoxia_assoc <- data.frame(
  gene = names(correlations),
  correlation = correlations,
  p_value = p_values,
  log10_pval = -log10(p_values),
  stringsAsFactors = FALSE
) %>%
  arrange(desc(abs(correlation)))

head(gene_hypoxia_assoc, 10)
##            gene correlation      p_value log10_pval
## ANKRD37 ANKRD37   0.9889460 0.0001826115   3.738472
## PHD2       PHD2   0.9832064 0.0004206692   3.376059
## PDK3       PDK3   0.9793637 0.0006343938   3.197641
## LDHA       LDHA   0.9779953 0.0007209834   3.142075
## HIF3A     HIF3A   0.9765273 0.0008199829   3.086195
## KDM4A     KDM4A   0.9725112 0.0011230647   2.949595
## VEGFB     VEGFB   0.9696857 0.0013645035   2.865025
## PFKFB4   PFKFB4   0.9665635 0.0016583129   2.780334
## CTGF       CTGF   0.9643244 0.0018864175   2.724362
## EPO         EPO   0.9633960 0.0019852625   2.702182

3. 左侧:基因表达热图与散点图 | Left Panel: Heatmap and Scatterplot

# 标准化表达矩阵用于热图 | Normalize expression matrix for heatmap
expr_scaled <- t(scale(t(expr_matrix)))

# 准备绘图数据 | Prepare plotting data
heatmap_data <- expr_scaled %>%
  as.data.frame() %>%
  rownames_to_column("gene") %>%
  pivot_longer(-gene, names_to = "sample", values_to = "expression")

# 添加条件信息 | Add condition information
heatmap_data <- heatmap_data %>%
  left_join(sample_info, by = c("sample" = "sample"))

# 创建热图 | Create heatmap
p_heatmap <- ggplot(heatmap_data, aes(x = sample, y = gene, fill = expression)) +
  geom_tile(color = "white", size = 0.5) +
  scale_fill_gradient2(
    low = "blue", mid = "white", high = "red",
    name = "表达量\n(Expression)"
  ) +
  facet_grid(. ~ condition, scales = "free_x", space = "free_x") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    axis.text.y = element_text(size = 8),
    legend.position = "bottom"
  ) +
  labs(
    title = "A. 基因表达热图 | Gene Expression Heatmap",
    x = "样品 | Sample",
    y = "基因 | Gene"
  )

print(p_heatmap)

4. 散点图:相关性分析 | Scatterplot: Correlation Analysis

# 绘制基因与低氧评分的相关性 | Plot gene-hypoxia correlation
p_scatter <- ggplot(gene_hypoxia_assoc, aes(x = correlation, y = -log10(p_value))) +
  geom_point(aes(color = correlation, size = abs(correlation)), alpha = 0.7) +
  scale_color_gradient2(
    low = "blue", mid = "gray", high = "red",
    name = "相关系数\n(Correlation)"
  ) +
  scale_size_continuous(name = "绝对值\n(|correlation|)") +
  geom_hline(yintercept = -log10(0.05), linetype = "dashed", color = "black", alpha = 0.5) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black", alpha = 0.5) +
  # 标注关键基因 | Annotate key genes
  ggrepel::geom_text_repel(
    data = gene_hypoxia_assoc %>% head(10),
    aes(label = gene),
    size = 3
  ) +
  theme_minimal() +
  labs(
    title = "B. 基因与低氧评分的相关性 | Gene-Hypoxia Score Correlation",
    x = "皮尔逊相关系数 | Pearson Correlation Coefficient",
    y = "-log10(p值) | -log10(p-value)"
  ) +
  theme(
    legend.position = "right",
    plot.title = element_text(hjust = 0.5, size = 12, face = "bold")
  )

print(p_scatter)

5. 右侧:通路与靶点分析 | Right Panel: Pathway and Target Analysis

# 定义低氧响应通路与基因 | Define hypoxia response pathways and genes
pathways <- list(
  "转录因子" = c("HIF1A", "HIF2A", "HIF3A"),
  "血管生成" = c("VEGFA", "VEGFB", "VEGFC", "ANGPT1"),
  "造血" = c("EPO"),
  "代谢重编程" = c("LDHA", "PKM2", "PFKFB3", "GLUT1", "PGK1"),
  "缺氧标志" = c("CAIX", "BNIP3", "BNIP3L"),
  "原癌基因激活" = c("MYC", "KRAS", "BRAF"),
  "免疫检查点" = c("PD-L1", "PD-L2", "LAG3"),
  "治疗靶点" = c("EGFR", "ALK", "ROS1", "BRAF", "KRAS")
)

# Transcription factors
# Angiogenesis
# Hematopoiesis
# Metabolic reprogramming
# Hypoxia markers
# Oncogene activation
# Immune checkpoints
# Therapeutic targets

# 创建通路数据 | Create pathway data
pathway_data <- data.frame(
  gene = character(),
  pathway = character(),
  expression_mean = numeric(),
  target_type = character(),
  stringsAsFactors = FALSE
)

for (pathway in names(pathways)) {
  genes_in_pathway <- pathways[[pathway]]
  for (gene in genes_in_pathway) {
    if (gene %in% rownames(expr_matrix)) {
      expr_mean <- mean(expr_matrix[gene, 3:6])  # 低氧条件的平均表达
    } else {
      expr_mean <- runif(1, 3, 8)  # 为不在矩阵中的基因生成随机值
    }
    
    target_type <- ifelse(
      pathway %in% c("免疫检查点", "治疗靶点"),
      "Drug Target",
      "Biomarker"
    )
    # Drug Target | Biomarker
    
    pathway_data <- rbind(pathway_data, data.frame(
      gene = gene,
      pathway = pathway,
      expression_mean = expr_mean,
      target_type = target_type,
      stringsAsFactors = FALSE
    ))
  }
}

# 按通路和表达量排序 | Sort by pathway and expression
pathway_data <- pathway_data %>%
  arrange(pathway, desc(expression_mean))

# 绘制通路图 | Create pathway visualization
p_pathway <- ggplot(pathway_data, aes(x = expression_mean, y = reorder(gene, expression_mean))) +
  geom_point(aes(color = pathway, size = expression_mean), alpha = 0.8) +
  geom_segment(aes(xend = 0, yend = gene, color = pathway), alpha = 0.3) +
  facet_wrap(~pathway, scales = "free_y", ncol = 1) +
  scale_color_brewer(palette = "Set2", name = "通路 | Pathway") +
  scale_size_continuous(name = "平均表达量\n(Mean Expression)") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10, face = "bold"),
    axis.text.y = element_text(size = 9),
    legend.position = "bottom"
  ) +
  labs(
    title = "C. 低氧响应通路与基因 | Hypoxia Response Pathways and Genes",
    x = "平均表达量 | Mean Expression (Hypoxia)",
    y = "基因 | Gene"
  )

print(p_pathway)

6. 综合分析:基因调控网络 | Integrated Analysis: Gene Regulatory Network

# 创建基因相互作用网络 | Create gene interaction network
# 基于表达相关性 | Based on expression correlation

# 计算基因之间的相关性 | Calculate correlations between genes
gene_cor <- cor(t(expr_matrix[1:15, ]))  # 使用前15个基因 | Use first 15 genes

# 转换为网络数据 | Convert to network data
gene_cor[lower.tri(gene_cor, diag = TRUE)] <- 0
network_edges <- which(abs(gene_cor) > 0.6, arr.ind = TRUE) %>%
  as.data.frame() %>%
  setNames(c("from", "to")) %>%
  mutate(
    from = rownames(gene_cor)[from],
    to = rownames(gene_cor)[to]
  )

# 创建图对象 | Create igraph object
if (nrow(network_edges) > 0) {
  g <- graph_from_data_frame(network_edges, directed = FALSE)
  
  p_network <- ggraph(g, layout = "fr") +
    geom_edge_link(alpha = 0.3, color = "gray") +
    geom_node_point(aes(size = 8), color = "steelblue", alpha = 0.8) +
    geom_node_text(aes(label = name), repel = TRUE, size = 4) +
    theme_graph() +
    labs(title = "D. 基因调控网络 | Gene Regulatory Network")
  
  print(p_network)
}

7. 汇总统计 | Summary Statistics

# 汇总信息 | Summary information
cat("=== 数据统计摘要 | Data Summary ===\n")
## === 数据统计摘要 | Data Summary ===
cat("基因数量 | Number of genes:", nrow(expr_matrix), "\n")
## 基因数量 | Number of genes: 31
cat("样品数量 | Number of samples:", ncol(expr_matrix), "\n")
## 样品数量 | Number of samples: 6
cat("条件类型 | Conditions:", paste(unique(sample_info$condition), collapse = ", "), "\n\n")
## 条件类型 | Conditions: Normoxia, Hypoxia, Hypoxia_High
cat("=== 前10个与低氧评分最相关的基因 | Top 10 Genes Correlated with Hypoxia Score ===\n")
## === 前10个与低氧评分最相关的基因 | Top 10 Genes Correlated with Hypoxia Score ===
print(head(gene_hypoxia_assoc, 10))
##            gene correlation      p_value log10_pval
## ANKRD37 ANKRD37   0.9889460 0.0001826115   3.738472
## PHD2       PHD2   0.9832064 0.0004206692   3.376059
## PDK3       PDK3   0.9793637 0.0006343938   3.197641
## LDHA       LDHA   0.9779953 0.0007209834   3.142075
## HIF3A     HIF3A   0.9765273 0.0008199829   3.086195
## KDM4A     KDM4A   0.9725112 0.0011230647   2.949595
## VEGFB     VEGFB   0.9696857 0.0013645035   2.865025
## PFKFB4   PFKFB4   0.9665635 0.0016583129   2.780334
## CTGF       CTGF   0.9643244 0.0018864175   2.724362
## EPO         EPO   0.9633960 0.0019852625   2.702182
cat("\n=== 表达变异分析 | Expression Variance Analysis ===\n")
## 
## === 表达变异分析 | Expression Variance Analysis ===
cat("平均基因表达 | Mean gene expression:", round(mean(expr_matrix), 2), "\n")
## 平均基因表达 | Mean gene expression: 7.9
cat("基因表达标准差 | SD of gene expression:", round(sd(expr_matrix), 2), "\n")
## 基因表达标准差 | SD of gene expression: 2.7
cat("低氧评分范围 | Hypoxia score range:", 
    round(min(sample_info$hypoxia_score), 2), "-", 
    round(max(sample_info$hypoxia_score), 2), "\n")
## 低氧评分范围 | Hypoxia score range: 5.37 - 10.56

Session Info

sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.8.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] zh_CN.UTF-8/zh_CN.UTF-8/zh_CN.UTF-8/C/zh_CN.UTF-8/zh_CN.UTF-8
## 
## time zone: Asia/Shanghai
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggraph_2.2.2          igraph_2.1.4          circlize_0.4.16      
##  [4] ComplexHeatmap_2.24.1 gridExtra_2.3         lubridate_1.9.4      
##  [7] forcats_1.0.1         stringr_1.5.2         dplyr_1.1.4          
## [10] purrr_1.1.0           readr_2.1.5           tidyr_1.3.1          
## [13] tibble_3.3.0          tidyverse_2.0.0       ggplot2_4.0.0        
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6        shape_1.4.6.1       rjson_0.2.23       
##  [4] xfun_0.53           bslib_0.9.0         GlobalOptions_0.1.2
##  [7] ggrepel_0.9.6       tzdb_0.5.0          vctrs_0.6.5        
## [10] tools_4.5.1         generics_0.1.4      stats4_4.5.1       
## [13] parallel_4.5.1      cluster_2.1.8.1     pkgconfig_2.0.3    
## [16] RColorBrewer_1.1-3  S7_0.2.0            S4Vectors_0.46.0   
## [19] lifecycle_1.0.4     compiler_4.5.1      farver_2.1.2       
## [22] ggforce_0.5.0       graphlayouts_1.2.2  codetools_0.2-20   
## [25] clue_0.3-66         htmltools_0.5.8.1   sass_0.4.10        
## [28] yaml_2.3.10         pillar_1.11.1       crayon_1.5.3       
## [31] jquerylib_0.1.4     MASS_7.3-65         cachem_1.1.0       
## [34] viridis_0.6.5       iterators_1.0.14    foreach_1.5.2      
## [37] tidyselect_1.2.1    digest_0.6.37       stringi_1.8.7      
## [40] labeling_0.4.3      polyclip_1.10-7     fastmap_1.2.0      
## [43] colorspace_2.1-2    cli_3.6.5           magrittr_2.0.4     
## [46] tidygraph_1.3.1     withr_3.0.2         scales_1.4.0       
## [49] timechange_0.3.0    rmarkdown_2.30      matrixStats_1.5.0  
## [52] png_0.1-8           GetoptLong_1.0.5    hms_1.1.3          
## [55] memoise_2.0.1       evaluate_1.0.5      knitr_1.50         
## [58] IRanges_2.42.0      doParallel_1.0.17   viridisLite_0.4.2  
## [61] rlang_1.1.6         Rcpp_1.1.0          glue_1.8.0         
## [64] tweenr_2.0.3        BiocGenerics_0.54.0 jsonlite_2.0.0     
## [67] R6_2.6.1